
"""
nllb 使用fasttext进行语种检测
nllb-200-distilled-600M语言缩写对照表，见README.md
"""

import fasttext
from threading import RLock


class LanguageDetector(object):

    lock = RLock()

    def __new__(cls, *args, **kwargs):
        with LanguageDetector.lock:
            if not hasattr(LanguageDetector, "_instance"):
                LanguageDetector._instance = object.__new__(cls)
                model_path = '/root/autodl-tmp/fasttext-language-identification/model.bin'
                cls.model = fasttext.load_model(model_path)
        return LanguageDetector._instance


    def detect(self, text: str) -> str:
        return self.model.predict(text)


if __name__ == '__main__':
    lang = LanguageDetector()
    texts = [
        'hello, world',
        '中文语句。',
        'ເຄື່ອງຄິດເລກຂອງນັກການເມືອງອາເມຣິກາແມ່ນຄິດໄລ່ “ພາສີລັກສະນະເທົ່າທຽມກັນ” ອອກມາແບບແນວໃດ?',
        'บนอินเทอร์เน็ตมีแค่ 70 กว่าบรรทัดเท่านั้นใช่ไหม',
        'Apa yang anda katakan mungkin tidak pernah berlaku. ',
        'Tinh Chất Dưỡng Sáng Da Cô Đặc Bottega Verde'
    ]
    for text in texts:
        print(lang.detect(text))